#!/usr/bin/perl
# use strict;
# use warnings;
use Cwd;
use lib getcwd. "/bin/bioperl-1.5.2_102"; 
use Bio::TreeIO;

## VERSION 3.03 ## le fichier PANAM_Affiliation est écrasé en cas de relance / les fichiers PANAM_Affliation_<group> sont effacés / ce fichiers ouverts sont fermés
## VERSION 3.02 ## ajout d'une fonction de tri des barcodes ## JCC avril 2013 ##
## VERSION 3.01 ## construction des taxonomic_distribution par échantillons/méthodes/normalisation ## JCC février 2013 ##

my ($USAGE) =  "\n\n\t****** USAGE of panam.pl PROGRAM ******\n\n\n\n\tUSAGE: perl $0 <panam.ini file> \n\n\n\n";
die "$USAGE" if ((scalar(@ARGV))< 1);
my $option_file = $ARGV[0];
chomp($option_file);

die "\n\n\t=> Cannot find configuration file: $option_file.\n\n" unless (-e $option_file);
die "\n\n\t=> Configuration file, $option_file, appears to be empty!\n\n" if (-z $option_file);
open(OFILE, "<$option_file") || die "Cannot open input file : $option_file\n";

my $usearch;
my $NGS_id_Results;
my $panam_output = "panam_output";
# my $parsing;
my $query_seq; 
my $user_file;
my $preprocess_output = "preprocess_output";
my $seq_F;
my $seq_R;
my $primF;
my $primR;
my $trim =0;
my $currdir = `pwd`;
chomp($currdir);
my $path = $currdir."/bin/uclust3.0.617";
print $path."\n";
my $us_version = "3.0.617";
####
my $fast = $currdir."/bin/./FastTree";
my $align = $currdir."/bin/./hmmer-2.3.2/src/hmmalign";
my $Reference;
my $build = $currdir."/bin/./hmmer-2.3.2/src/hmmbuild";


####
my $dom;
my %dom;
my $aff_norm; 
## VERSION 3.02 ##
my $nbrBar; my $ficBar;
my $barIn; my $barcode;
#### V3.02

while (<OFILE>) {
	chomp;
	my $line = $_;

# discard blank line
	if ($line =~ m/^\s*$/g) {
		next;
	}

# discard comment line
	elsif($line =~ m/^\s*#/g) {
		next;
	}
	
# get input options line
        else {
		
		if($line=~m/454_RUN_IDENTIFIER/gi) { 
			$line=~s/454_RUN_IDENTIFIER//gi;
			$line=~s/\t//gi;
			$line=~s/^\s+//gi;
			$line=~s/\s+$//gi;
			if($line) {
				$NGS_id_Results = $line; 
			}
	 		die "\n\tOutput directory for NGS analyses is not defined. Check panam.ini.\n\n" unless ( $NGS_id_Results ne "");
			unless (-d $NGS_id_Results) { mkdir $NGS_id_Results || die "\n\tCould not create Directory $NGS_id_Results $!\n"; }
		}

 		if (-d $NGS_id_Results."/".$panam_output) { `rm -r $NGS_id_Results/$panam_output` } 
		if (!(-d $NGS_id_Results."/".$panam_output)) { mkdir $NGS_id_Results."/".$panam_output || die "\n\tCould not create Directory $NGS_id_Results/$panam_output $!\n"; }


		if($line=~m/QUERY_SEQUENCES/gi) {
			$line=~s/QUERY_SEQUENCES//gi;
			$line=~s/\t//gi;
			$line=~s/^\s+//gi;
			$line=~s/\s+$//gi; 
			if($line) {
				$query_seq = $line; 
			}
			chomp ($query_seq);
			# See if the query sequences file exist and contains something
			die "\n\n\tCannot find query sequence file: $query_seq. Check panam.ini.\n\n" unless (-e $query_seq);
			die "\n\n\tQuery sequence file, $query_seq, appears to be empty!\n\n" if (-z $query_seq);
			
			my $ligne1= `sed -n '1p' $query_seq`;
			my $ligne2= `sed -n '2p' $query_seq`;	
# 			if (($ligne1 !~ /^>/) or ($ligne2 !~ /^[ATCGU]/)) { 
			if (($ligne1 !~ /^>/) or ($ligne2 !~ /^[ATCGUatcgu]/)) {  ###### modif 24/5
				die "\n\n\tYour file $query_seq does not seem to contain sequences in fasta format.\n\n";
			}		
		}

		## VERSION 3.02 ##
		if($line=~m/BARCODE_FILE/gi) { 
			$line=~s/BARCODE_FILE//gi; ;
			$line=~s/\t//gi; 
			$line=~s/^\s+//gi; 
			$line=~s/\s+$//gi; 
			if($line) {
				$barIn = $line; #print "$barIn\n";
			}

	
			if (!(defined $barIn ) or ($barIn eq "")) {
				$barcode =0;
				$nbrBar = 0;
				########ajout 13/5
				$ficBar = "no barcode file" ;
				#############fin ajout 13/5
			}
			elsif ((defined $barIn) and ($barIn ne "")){
				$barcode = 1;
				##########" ajout 13/5
				$ficBar = $barIn;
				########fin ajout 13/5     

				# See if the barcode file exist and contains something
				die "\n\n\tCannot find barcode file: $barIn. Check panam.ini.\n\n" unless (-e $barIn);
				die "\n\n\tBarcode file, $barIn, appears to be empty!\n\n" if (-z $barIn);

				open (BB, $barIn);
				my @barIN= <BB>;
				close BB;
				foreach my $e (@barIN) {
					if ($e !~ /\t/) {die "\n\n\tYour bar code file $barIn does not seem to be in the right format.\n\n";}
					my @ty = split(/\t/, $e);
					if (($ty[0] =~ /^\d/) or ($ty[0]=~ /_/) or ($ty[0]=~ /\//)) { die "\n\n\tThe bar code Ids in $barIn should start with no number and contain no underscorses.\n\n";}
					if ($ty[1] !~ /[ATCGU]/i) {die "\n\n\tYour bar code file $barIn does not seem to be in the right format.\n\n";}
				}
				############### fin modif + ajout

				my $nbr = `wc -l $barIn`;
				if($nbr =~ /(\d.?)\s/) { $nbrBar = $1} #### modif 25/5 w --> d 
			}
		}
		#### V3.02

		if($line=~m/USER_FILE/gi) {
			$line=~s/USER_FILE//gi;
			$line=~s/\t//gi;
			$line=~s/^\s+//gi;
			$line=~s/\s+$//gi;
			if($line) {
				$user_file = $line; 
			}
			die "\n\n\t User file value missed. Check panam.ini.\n\n" unless (($user_file ne ""));
			die "\n\n\t Invalid user file value!\n\n" unless (($user_file eq "own") or ($user_file eq "pooledSamples") or ($user_file eq "eachSample"));	
		}

		if($line=~m/FORWARD_PRIMER_NAME/gi) {
			$line=~s/FORWARD_PRIMER_NAME//gi;
			$line=~s/\t//gi;
			$line=~s/^\s+//gi;
			$line=~s/\s+$//gi;
			if($line) {
				$primF = $line; 
			}
		}
	
		if($line=~m/REVERSE_PRIMER_NAME/gi) {
			$line=~s/REVERSE_PRIMER_NAME//gi;
			$line=~s/\t//gi;
			$line=~s/^\s+//gi;
			$line=~s/\s+$//gi;
			if($line) {
				$primR = $line; 
			}
		}

		if($line=~m/FORWARD_PRIMER_SEQUENCE/gi) {
			$line=~s/FORWARD_PRIMER_SEQUENCE//gi;
			$line=~s/\t//gi;
			$line=~s/^\s+//gi;
			$line=~s/\s+$//gi;
			if($line) {
				$seq_F = $line; 
			}
		}
	
		if($line=~m/REVERSE_PRIMER_SEQUENCE/gi) {
			$line=~s/REVERSE_PRIMER_SEQUENCE//gi;
			$line=~s/\t//gi;
			$line=~s/^\s+//gi;
			$line=~s/\s+$//gi;
			if($line) {
				$seq_R = $line;
			}
		}
			
		###################" choix de la base
		if($line=~m/REFERENCE_BASE/gi) {
			$line=~s/REFERENCE_BASE//gi;
			$line=~s/\t//gi;
			$line=~s/^\s+//gi;
			$line=~s/\s+$//gi;
			if($line) {
				$Reference = $line; 
			}
		}

		############### normalisation
		if($line=~m/NBR_SEQ_NORM/gi) {
			$line=~s/NBR_SEQ_NORM//gi;
			$line=~s/\t//gi;
			$line=~s/^\s+//gi;
			$line=~s/\s+$//gi;
			if($line) {
				$aff_norm = $line; #print "aff --- $aff_norm\n";
			} 
		}
		
		#####################################

		if($line=~m/DOMAIN/gi) {
			$line=~s/DOMAIN//gi;
			$line=~s/\t//gi;
			$line=~s/^\s+//gi;
			$line=~s/\s+$//gi;
			if($line) { 
				$dom = $line;
				if (defined $dom ) { #push (@dom, $dom);
					$dom{$dom} = 1
				}
			}
			#foreach my $d (@dom) {
			foreach my $d (keys %dom) {
				die "\n\n\t Domain name is not correct. Check panam.ini.\n\n" unless (($d eq "bacteria") or ($d eq "eukaryota") or ($d eq "archaea"));
			}	
		}
 	}
}
die "\n\n\t Invalid number of sequences to pick for the normalization! Check panam.ini.\n\n" if ((defined $aff_norm ) and ($aff_norm !~ /^\d+$/));
if (!(defined $aff_norm)) {$aff_norm = '200' }


## VERSION 3.02 ##
##################### Ajout JCC avril 2013 : by sample order

my $socat = `cat -n $barIn | awk -F"\t" '{ print\$1"\t"\$2 }'`;
my @socat = split("\n", $socat);
my %so = ();

foreach my $line (@socat) {
    my ($val, $key) = split("\t", $line);
    $so{$key} = $val;
}

#########################################################
#### V3.02

print "$Reference\n";


########################### ajout position

my $forw_regexp; my $reve_regexp;
my @tab_forw=split(//,$seq_F);
$forw_regexp=regexp(@tab_forw);

my $rev=reverse($seq_R);
my @tab_reve=split(//,$rev);
$reve_regexp=regexp(@tab_reve);
$reve_regexp=~tr/ACGURMBD/UGCAYKVH/;

sub regexp{
	my $regexp;
	foreach (@_){
		if ($_ eq "T"){
			$regexp.="U";
		}
		elsif ($_ eq "R"){
			$regexp.="[AGR]";
		}
		elsif ($_ eq "Y"){
			$regexp.="[CUY]";
		}
		elsif ($_ eq "M"){
			$regexp.="[CAM]";
		}
		elsif ($_ eq "K"){
			$regexp.="[UGK]";
		}
		elsif ($_ eq "W"){
			$regexp.="[UAW]";
		}
		elsif ($_ eq "S"){
			$regexp.="[CGS]";
		}
		elsif ($_ eq "B"){
			$regexp.="[CUGB]";
		}
		elsif ($_ eq "D"){
			$regexp.="[AUGD]";
		}
		elsif ($_ eq "H"){
			$regexp.="[AUCH]";
		}
		elsif ($_ eq "V"){
			$regexp.="[ACGV]";
		}
		elsif ($_ eq "N"){
			$regexp.="[ACGUN]";
		}
		else{
			$regexp.=$_;
		}
	}
	return $regexp;
}

############################################# trimming profiles ######################################################

if (!(defined $primF) and (defined $primR))  {
	die "\n\n\t Primer forward not defined. Check panam.ini.\n\n";
}

if (defined $primF) {
	die "\n\n\t Primer reverse not defined. Check panam.ini.\n\n" unless (defined $primR);
	die "\n\n\t Primers' sequences missed. Check panam.ini.\n\n" unless ((defined $seq_F) and (defined $seq_R));
	die "\n\n\t Primers' sequences seem to be in the wrong format. Check panam.ini.\n\n" unless (($seq_F =~ /[ATCGUatcgu]/) and ($seq_R =~ /[ATCGUatcgu]/));
	$trim = 1
}

my @tax=();
my $Profiles;
if ($trim == 1) {
	$Profiles = $Reference."/Reference_all/Profiles_".$primF."_".$primR;
	if ((-d $Profiles)) {
		print "Trimmed profiles exist in $Profiles/\n";

	}
	
	else {
		print "\n\tTrimming profiles ...\n"; 

		###############"""récuperation des positions
		my %list_profils;
		my $acces; 
		my $taxo;
# 		my @tax = ();
		my $rep = $Reference."/Reference_all/Profiles/fasta/";
		my @listefic = <$rep*.fasta>;
			
		my $pos_forward; my $pos_reverse;
			
		foreach my $nn (@listefic) { 
			#foreach my $domain (@dom){
			foreach my $domain (keys %dom) {
				if ($nn =~ /$domain/) { 
					if ($nn=~ /$Reference\/Reference_all\/Profiles\/fasta\/(.*?).fasta/) { $taxo = $1 ; push (@tax, $taxo); }	
					open (my $profils,"<".$Reference."/Reference_all/Profiles/fasta/$taxo.fasta") || die "can not open file";
					while (<$profils>){
						chomp $_;
						if ($_=~/^>(.*)$/){
							$acces=$1;
						}
						else{
							$_=~s/ //g;
							$list_profils{$taxo}{$acces}.=$_;
						}
					}
				close $profils;
				}
			}
		}
			
		my $pos_reverse_som=0; my $pos_forward_som=0; my $if =0; my $ir=0; 
		foreach my $taxo (@tax){
			my $pos_forward;my $pos_reverse;
			foreach my $acces (keys %{$list_profils{$taxo}}){
				my $sequence=$list_profils{$taxo}{$acces};
		
				$sequence=~s/-//g;
				$sequence=~s/\.//g;
		
				if ($sequence=~/$forw_regexp/){
					$if++;
					$pos_forward=length($`) ;
					$pos_forward_som+=$pos_forward;
				}
				if ($sequence=~/$reve_regexp/){
					$ir++;
					$pos_reverse=length($`) + length($&);
					$pos_reverse_som+= $pos_reverse;
				}
						
				if (defined($pos_reverse) && defined($pos_forward)){
					last;	
				}
			}
		}
			
		my $posF = int ($pos_forward_som/$if); my $posR = int ($pos_reverse_som/$ir);

		#############fin recuperation des positions

		trim ($primF, $primR, $posF, $posR);
		print "\n\tTrimmed profiles have been generated in ".$Profiles."\n\n";
	}
}

sub trim {
	my $posF; my $posR; my $primF; my $primR; my $fasta_file; my $hmm_file;
	($primF, $primR, $posF, $posR) = @_;

	my $nom = $primF."_".$primR ;
	
	`mkdir $Reference"/Reference_all/Profiles_"$nom/`;
	`mkdir $Reference"/Reference_all/Profiles_"$nom/hmmprofil/`;
	`mkdir $Reference"/Reference_all/Profiles_"$nom/fasta/`;
	
	open (C, $Reference."/seq_model_trim") || die "cannot open file";
	my %corresp;
	while (<C>) {
		my @tab = split (/\t/, $_);
		chomp ($tab[0]); chomp ($tab[1]);
		$corresp{$tab[0]} = $tab[1];
	}
	close C;
	
	foreach my $n (@tax) {
		open (F, $Reference."/Reference_all/Profiles/fasta/".$n.".fasta") || die "can not open file";
		my %seq ; my $a;
		while (<F>) {
			if ($_=~ />(.*?)\s+/ ) {
				$a = $1;
			}
			else {
				chomp($_);
				$seq{$a}.= $_;
			}
		}
		close F;
		
		my $seq = $corresp{$n};
		
		my @tab = split (//, $seq{$seq}) ;
		
		my $pos_reel =1;
		my $pos_nuc =1;
		my $pos_nuc_fin;
		
		my $posF_gaped;
		my $posR_gaped;
		
		foreach my $e (@tab) {
		
			if (($e ne "-") and ($e ne ".")) {
				$pos_nuc ++;
			}
			if ($pos_nuc == $posF) {
				$posF_gaped = $pos_reel;	
			}
			
			elsif ($pos_nuc == $posR) {
				$posR_gaped = $pos_reel;	
				last;
			}
	
			$pos_reel++;
		}
			
		if (!(defined $posR_gaped)) {$posR_gaped = $pos_reel}
			
		my $length = $posR_gaped - $posF_gaped;
		
		open (A, ">".$Reference."/Reference_all/Profiles_".$nom."/fasta/".$n.".fasta") || die "can not open file"; 
		foreach my $k (keys %seq) {
			my $sub = substr ($seq{$k}, $posF_gaped, $length);
			if ($sub=~ /[ATCGUatcgu]/) {
				print A ">$k\n$sub\n"
			}
		}
		
		close A;
			
		$fasta_file = $Reference."/Reference_all/Profiles_".$nom."/fasta/".$n.".fasta" ;
		`$build -g $Reference"/Reference_all/Profiles_"$nom"/hmmprofil/$n".hmmprofil $fasta_file` ;
			
		$hmm_file = $Reference."/Reference_all/Profiles_".$nom."/hmmprofil/".$n.".hmmprofil"
	}
	return ($fasta_file , $hmm_file)
}
 
if ($trim == 0) {
	$Profiles= $Reference."/Reference_all/Profiles"
}

########### hash query sequences 
my %h; my $c;
open (F, $query_seq);
while (<F>){
	if ($_ =~ /^>(.*?)\n/) {
		$c=$1;
		$c =~ s/\s+$//; 
		
	}
	else {
		chomp ($c);
		$h{$c}.=$_ ;
	}
}
close F;

########### hash tax
my %taxeuk;
open (T, $Reference."/Taxonomy") || die "can not open file";
while (<T>) {
	my @e = split (/\t/, $_);
	chomp($e[0]); chomp ($e[1]); chomp ($e[2]);
	$taxeuk{$e[0]}=$e[1]."\t".$e[2];
}

#################################################
########### hash reference sequences + uclust
`mkdir $NGS_id_Results/$panam_output"/Similarity_annotation"` ;
`$path --sort $query_seq --output $NGS_id_Results/$panam_output/Similarity_annotation/seq_sorted_seed 2>&1` ; 

print "usearch ...\n";

my $repp = $Reference."/Reference_all/Reference_bases/";
my $e; my %seq;
# 
######################################"""""""
#####comparer aux bases seed

my $rep_seed = $Reference."/Reference_all/Reference_bases/seed_bases/";
my @listeseed = <$rep_seed*_seed> ;
my %seed_affil;
foreach (@listeseed) {
	my $seed_file = $_;

	if ($seed_file =~ /\/Reference_all\/Reference_bases\/seed_bases\/(.*?)_seed/ ){
		my $seed = $1;
		print "seed --- $seed\n";
		
		if (($us_version eq "1.1.579q") or ($us_version eq "3.0.617")) {
			`$path --input $NGS_id_Results/$panam_output/Similarity_annotation/seq_sorted_seed --lib $seed_file --uc $NGS_id_Results/$panam_output/Similarity_annotation/seq_results_seed_"$seed".uc --id 0.10 --libonly --allhits --maxaccepts 1 --rev `;
		}

		else {
			`$path --query $NGS_id_Results/$panam_output/Similarity_annotation/seq_sorted_seed --db $seed_file --uc $NGS_id_Results/$panam_output/Similarity_annotation/seq_results_seed_"$seed".uc  --id 0.10 --maxaccepts 1 --rev --local 2>&1`
		}	

		open (KK, $NGS_id_Results."/".$panam_output."/Similarity_annotation/seq_results_seed_".$seed.".uc") || die "can not open file" ;
		while (<KK>) {
			if (!($_=~ /\#/)) {
				my @t = split (/\t/, $_);
				if ($t[0] eq "H") {
					chomp($t[8]); chomp($t[3]); 
					$seed_affil{$t[8]}{$seed} = $t[3];
				}
			}
		} 
		close KK;		
	}
}

my %prim_affil ;
foreach my $u (keys %seed_affil) {
	my $max = 0;
	foreach my $uu (keys %{$seed_affil{$u}}) {
		if ($seed_affil{$u}{$uu} > $max) { 
			$max = $seed_affil{$u}{$uu}	; 		
			$prim_affil{$u} = $uu ;
		}
	}
}			

#open (DISC, ">".$NGS_id_Results."/".$panam_output."/Similarity_annotation/discarded.fasta") || die "can not open file" ;
foreach my $k (keys %prim_affil) {
	if (!(exists $dom{$prim_affil{$k}})) {
		#print DISC ">$k\n$h{$k}";
		delete ($h{$k});
	}
}
#close DISC;

open (QUERY, ">".$NGS_id_Results."/".$panam_output."/Similarity_annotation/query_seq") || die "can not open file" ;
foreach my $mm (keys %h) {
	print QUERY ">$mm\n$h{$mm}"
}		
close QUERY;

`$path --sort $NGS_id_Results/$panam_output/Similarity_annotation/query_seq --output $NGS_id_Results/$panam_output/Similarity_annotation/seq_sorted 2>&1` ; 
###################################################"" fin comparaison aux seed bases

my @listeficc = <$repp*.fasta>;
my $num_uc=1;
foreach(@listeficc) { 
	my $ficTemp = $_; 
	#foreach my $domain (@dom){	
	foreach my $domain (keys %dom) {
		if ($ficTemp =~ /$domain/) {

		#################################################
			if (($us_version eq "1.1.579q") or ($us_version eq "3.0.617")) {
				`$path --input $NGS_id_Results/$panam_output/Similarity_annotation/seq_sorted --lib $ficTemp --uc $NGS_id_Results/$panam_output/Similarity_annotation/seq_results_"$num_uc".uc --id 0.10 --libonly --allhits --maxaccepts 5 --rev `;
			}

			else {
				`$path --query $NGS_id_Results/$panam_output/Similarity_annotation/seq_sorted --db $ficTemp --uc $NGS_id_Results/$panam_output/Similarity_annotation/seq_results_"$num_uc".uc  --id 0.10 --maxaccepts 5 --rev --local 2>&1`
			}
		##################################################


			`cat $NGS_id_Results/$panam_output/Similarity_annotation/seq_results_"$num_uc".uc >> $NGS_id_Results/$panam_output/Similarity_annotation/seq_results.uc` ;

			$num_uc++;
			open(Q, "$ficTemp");
			while (<Q>){
				if ($_ =~ /^>(.*?)\s+/) {
					$e=$1;
				}
				else {
					chomp ($_);
					$seq{$e}.=$_ ;
				}
			}
			close Q ;
		}
	}
}


####################### sorting sequences
 
if (!(-z $NGS_id_Results."/".$panam_output."/Similarity_annotation/seq_results.uc")) {
	print L "usearch done\n";
}

#################le premier tableau @tax n'existe pas toujours, si les profiles existent déjà, @tax n'est pas créé. 
print "Sorting sequences ...\n"; 
my @tax = ();
open (IL, $Reference."/sortingtax");
@tax=<IL>;
close IL;

print "Step 1 read $NGS_id_Results.\"/\".$panam_output.\"/Similarity_annotation/seq_results.uc\"\n";

open (F, $NGS_id_Results."/".$panam_output."/Similarity_annotation/seq_results.uc") || die "can not open file $NGS_id_Results/$panam_output/Similarity_annotation/seq_results.uc" ;
open (B, ">".$NGS_id_Results."/".$panam_output."/Similarity_annotation/best_hit_uc");
my %hits; my %target_hits;
while (<F>) {
	if (!($_=~ /\#/)) {
		my @t = split (/\t/, $_);
		if ($t[0] eq "H") {
			chomp ($t[8]) ,chomp($t[9]); chomp ($t[3]) ;
			$t[8] =~ s/\s+$//; $t[9] =~ s/\s+$//; $t[3] =~ s/\s+$//;
			$target_hits{$t[8]}{$t[9]} = $t[3];
			#print "$target_hits{$t[8]}{$t[9]} \n";
			if ($t[4] eq "-") {
				my $rev = reverse ($h{$t[8]});
				$rev =~ tr/ACGTUacgtu/TGCAAtgcaa/ ; ##### modif 24/5
				chomp ($rev);
				$h{$t[8]} =$rev;
			}
		}
	}
}

print "Step 2\n";

my %tax_hits; my %seq_hits;
foreach my $query (keys %target_hits) {
	my $nb_target=0;
	foreach my $target (sort {$target_hits{$query}{$b} <=> $target_hits{$query}{$a}} keys %{$target_hits{$query}}) {
		my $ligne_taxo = $taxeuk{$target};
		my @t = split (/\t/, $ligne_taxo);
		print B "$query\t$target\t$t[0]\t$t[1]\n";
		$tax_hits{$query}{$nb_target} = $t[1];
		$seq_hits{$query}{$nb_target} = $target;
		$nb_target++;
		if ($nb_target == 5) {last}
	}
	#print "$nb_target \n";
}

close B;

print "Step 3\n";

`mkdir $NGS_id_Results/$panam_output"/Similarity_annotation/Sorted_Sequences"`;
foreach my $tax (@tax) { 
	print "DEBUG: $tax ---- $Profiles\n";
	chomp ($tax);
	my %tax_bis;
	if (-e $Profiles."/fasta/".$tax.".fasta") { 
		open (T, $Profiles."/fasta/".$tax.".fasta") || die "can not open file"; 
		my $aa; my %seq_ref;
		while (<T>) {
			if ($_=~ />(.*?)\s+/) {
				$aa = $1;
				chomp ($aa);
				$seq_ref{$aa}=1;
			}
		}
		close T;
		
		if (!(-d $NGS_id_Results."/".$panam_output."/Similarity_annotation/Sorted_Sequences/files_".$tax)) {
			`mkdir $NGS_id_Results/$panam_output"/Similarity_annotation/Sorted_Sequences/files_"$tax`;
		}
		print "DEBUG: ===============================\n";
		`ls $NGS_id_Results/$panam_output"/Similarity_annotation/Sorted_Sequences/"`;
		open (TAX, ">".$NGS_id_Results."/".$panam_output."/Similarity_annotation/Sorted_Sequences/files_".$tax."/seq_".$tax);
		
		my $debug = 0;
		foreach my $seq_c (keys %seq_hits) {
# 			if ($tax_hits{$seq_c}{'0'} =~ /$tax/) {
			if ($tax =~ /$tax_hits{$seq_c}{'0'}/) {
				print TAX ">$seq_c\n$h{$seq_c}\n";
		
				my %aze;
				foreach my $e (keys %{$seq_hits{$seq_c}}) {
					$aze{$seq_hits{$seq_c}{$e}}= 1
					
				}
	
				foreach my $uu (keys %seq_ref) {
					delete ($aze{$uu})
				}
	
				foreach my $qs (keys %aze) {
					$tax_bis{$qs} = 1;
				}
				$debug++;
			}
		}
		print "DEBUG: $debug\n";
		
		foreach my $ww (keys %tax_bis) {
			print TAX ">$ww\n$seq{$ww}\n";
		}
		close TAX;
	}
}

print "Step 4\n";

# 	
###################################### cutting ...

my $max=0;
foreach my $e (@tax) { 
	my $aa; my %hh; my $i= 0; my $num=1; 
	if (-e $NGS_id_Results."/".$panam_output."/Similarity_annotation/Sorted_Sequences/files_".$e."/seq_".$e) {

		open (AA,  $NGS_id_Results."/".$panam_output."/Similarity_annotation/Sorted_Sequences/files_".$e."/seq_".$e);
		while (<AA>) {
			if ($_=~/^(>.*?)\s/) {
				$aa = $1;
			}
			else {
				$hh{$aa}.=$_;
			}	
		}
		
		open (P, ">". $NGS_id_Results."/".$panam_output."/Similarity_annotation/Sorted_Sequences/files_".$e."/file_cut_".$num);

		foreach my $ee (keys %hh) {
			$i++;
			if ($i != 27001) {
 				print P "$ee\n$hh{$ee}\n";
			}
			else  {
				close P;
				$num++;
				open (P, ">". $NGS_id_Results."/".$panam_output."/Similarity_annotation/Sorted_Sequences/files_".$e."/file_cut_".$num);
				print P "$ee\n$hh{$ee}\n";
				$i= 1;
				
			}
		}
		if ($num > $max) { $max = $num; }
		close P;
	}		
}

print "Step 5\n";

my @num = (1 .. $max);

# #################################""
 
`mkdir $NGS_id_Results/$panam_output"/Phylogeny"` ;
`mkdir $NGS_id_Results/$panam_output"/Alignment"`;
`mkdir $NGS_id_Results/$panam_output"/Phylogeny/Node_files"`;


################## changé /!\ 28/3/2012
open (AFFIL, ">".$NGS_id_Results."/PANAM_Affiliation.txt");		## VERSION 3.03 ## ">>" changed to ">" 

###############"ajout /!\ fichiers clades 28/3/2012
open (CLADENN, ">".$NGS_id_Results."/PANAM_Clades_NN.txt") || die "can not open file";
open (CLADELCA, ">".$NGS_id_Results."/PANAM_Clades_LCA.txt") || die "can not open file";
 my %taxOTU; my %leveltax; my %sample;
################" fin ajout fichiers clades

##########ajout 24/5
my %seq_affil;
######################" fin ajout 24/5

foreach my $kk (@tax) {
	foreach my $fff (@num) {
#########Alignment
		if (-e $NGS_id_Results."/".$panam_output."/Similarity_annotation/Sorted_Sequences/files_".$kk."/file_cut_".$fff) { 
			my $ca = $align." -o ".$NGS_id_Results."/".$panam_output."/Alignment/".$kk.".".$fff.".fasta --outformat a2m --mapali ".$Profiles."/fasta/".$kk.".fasta ".$Profiles."/hmmprofil/".$kk.".hmmprofil ". $NGS_id_Results."/".$panam_output."/Similarity_annotation/Sorted_Sequences/files_".$kk."/file_cut_".$fff;	
			print "$ca\n";
			system "$ca 2>&1"  ;
			print L "Alignment files_$kk.$fff done\n"	
		}
	
#######Phylogeny	
		if (-e $NGS_id_Results."/".$panam_output."/Alignment/".$kk.".".$fff.".fasta") {
			my $ct = $fast." -nt -boot 100 ".$NGS_id_Results."/".$panam_output."/Alignment/".$kk.".".$fff.".fasta > ".$NGS_id_Results."/".$panam_output."/Phylogeny/".$kk.".".$fff.".newick";
			print "$ct\n";
			system "$ct " ;
			print L "Phylogeny $kk.$fff done\n";
		}

########Rooting tree
		if ( -e $NGS_id_Results."/".$panam_output."/Phylogeny/".$kk.".".$fff.".newick"){
			open (FF , $NGS_id_Results."/".$panam_output."/Phylogeny/".$kk.".".$fff.".newick") ;
	
			my $tree="";
			my $id_externe = "9999";
			while (<FF>) {
				$tree.=$_
			}
		
			my $n_tree;
			if ($tree=~ /(^\(.*\(AX409584:.*?,DD141196:.*?\))(.*?)(:.*)/) {
				$n_tree = "$1".$id_externe."$3";
			}

			elsif ($tree=~ /(^\(.*\(DD141196:.*?,\(AX409584:.*?\).*?\))(.*?)(:.*)/){
				$n_tree = "$1".$id_externe."$3";
			}

			elsif ($tree=~ /(^\(.*\(DD141196:.*?,AX409584:.*?\))(.*?)(:.*)/) {
				$n_tree = "$1".$id_externe."$3";
			}

			elsif ($tree=~ /(^\(.*\(AX409584:.*?,\(DD141196:.*?\).*?\))(.*?)(:.*)/) {
				$n_tree = "$1".$id_externe."$3";
			}	

			else { 
				$n_tree = $tree;
				$id_externe = "DD141196"
			}

			open (FFF,">".$NGS_id_Results."/".$panam_output."/n_tree");
			print FFF "$n_tree";
			close FFF;
	
			my $data = $NGS_id_Results."/".$panam_output."/n_tree";
			my $res = $NGS_id_Results."/".$panam_output."/Phylogeny/".$kk.".".$fff."_rooted.newick";
	
	
			my $in = Bio::TreeIO->new(-format => 'newick', -file => $data);
	
			my $out = Bio::TreeIO->new(-format => 'newick', -file => ">$res");
	
			while( my $t = $in->next_tree ){
				my ($a) = $t->find_node(-id =>"9999");
				$t->reroot($a);
				$out->write_tree($t);
			}
		}
	
# 		`rm $NGS_id_Results/$panam_output/n_tree`;

#############Nodes_file
		if ( -e $NGS_id_Results."/".$panam_output."/Phylogeny/".$kk.".".$fff."_rooted.newick"){
	
			my $nom_fichier = $NGS_id_Results."/".$panam_output."/Phylogeny/".$kk.".".$fff."_rooted.newick"  ;
			open (E, ">".$NGS_id_Results."/".$panam_output."/Phylogeny/Node_files/file_get_descendent_seq_".$kk.".".$fff);
	
			my $input = new Bio::TreeIO -> new(-file   => $nom_fichier,
				-format => "newick");
			
			while (my $tree = $input->next_tree) {
				for my $node ( grep { ! $_->is_Leaf } $tree->get_nodes) {
					print E" Node : ", $node->id ;
					for my $child ( $node -> get_Descendents ) {
						print E" child : ", $child->id ;
					}
					print E"\n";
				}
			}
			close E;
		}
		########################### parsing
		if ( -e $NGS_id_Results."/".$panam_output."/Phylogeny/Node_files/file_get_descendent_seq_".$kk.".".$fff){
			open (W, $NGS_id_Results."/".$panam_output."/Phylogeny/Node_files/file_get_descendent_seq_".$kk.".".$fff) ;
			my $i=0;
			my %hh; my %child; my %node_seq; my %boot;
			while (<W>) {
				if ($_=~ /Node :\s(.*?)\schild/) { 
					my $a = "Node_".$i; 
					my $boot_value = $1;
					my $child = ($_=~ tr/child//);
					$child{$a}= $child; 
					$child{$a}{'bootstrap'} = $boot_value;
	
					$boot{$a}= $boot_value;
					my @tab = split (/:/, $_);
					foreach my $e (@tab) { 
						my @t = split (/ /,$e);
						foreach my $p (@t) { 
							my $pp; 
							if (($p=~ /^\w/) and ($p !~ /\./)) { 
								chomp ($p);
								if (($p !~ /Node/) and ($p !~ /child/)){
									$pp = $p;
								}
								if (defined $pp ) { 
									$node_seq{$pp} .="\t".$a; 
									my @tax_seq = split(/\t/, $taxeuk{$pp});
									my $tax_seq = $tax_seq[0];
									if (defined ($tax_seq)) {
										chomp ($tax_seq); 
										$hh{$a}{'tax'}.="\t".$tax_seq;
										$hh{$a}{'seq'}.="\t".$pp;		
									}	
								}
							}
						}
					}
				}
				$i++;
			}
	
	##############"NN	
			my %nn_node;
			foreach my $nn (keys %hh) {
				my @mmm = split (/\t/, $hh{$nn}{'tax'});
				if ($mmm[1] ne "") {
					$nn_node{$nn} = $mmm[1]
				}
			}
	
	###################"LCA
			my %taxo;
			foreach my $p (keys %hh) {	
				my @ttax=();
				my @tab = split (/\t/,$hh{$p}{'tax'}); 
				my $pp = $#tab;
				
			##### toutes les taxonomies reliées à un noeud sont stockées dans @t; 
		
				for (my $i=0; $i<=$pp; $i++) {
					if ($tab[$i] ne "") {
						push(@ttax, $tab[$i]);
					}
				}
				$taxo{$p}{'tax'} = \@ttax;
			}
	
			##########################

			my %taille_taxo;
			foreach my $bb (keys %taxo) {
				my $lmin_silva=100000; 
		
				my $mm = $#{$taxo{$bb}{'tax'}};
				my @r; my @rr; my $nnn ; my $nn;
				
				for(my $i=0; $i<=$mm; $i++) {
					my @r = split(/;/, ${$taxo{$bb}{'tax'}}[$i]);
					my $nn = $#r;
					if (($nn >=0) and ($nn < $lmin_silva)) {
						$lmin_silva = $nn;
					}
				}
					
				$taille_taxo{$bb}{'tax'} = $lmin_silva;
			}

			my %term; my %taxo_com; my $qq; my @tt; my %rang_taxo;
				
			foreach my $p (keys %taxo) {
				my $qs = $#{$taxo{$p}{'tax'}}; 
				for (my $oo =0; $oo<= $qs; $oo++) { 
					my @t = split (/;/, ${$taxo{$p}{'tax'}}[$oo]); 
					for (my $i = 0; $i <= $taille_taxo{$p}{'tax'}; $i++) {
						if (exists $t[$i]) { #print "ti --- $t[$i]\n";
							$term{$p}{'tax'}{$i}.="\t".$t[$i];
						}
					}	
				}
			}
	
			my %tax_node;
			foreach my $tt (keys %term) { 
				foreach my $cc (keys %{$term{$tt}}) { 
					$tax_node{$tt}{$cc}=""; 
					my %temp=%{$term{$tt}{$cc}};
					my @liste_level=sort keys(%temp);
					foreach(@liste_level){
						my $ss=$_; 
						my %rang_tax;		
						if (defined $term{$tt}{$cc}{$ss}) {
							my @ff = split(/\t/, $term{$tt}{$cc}{$ss});
							my $t_max = $#ff; 
							foreach my $ll (@ff) {
								$rang_tax{$ll}++;
							} 
				
							foreach my $aa (keys %rang_tax) {
								if ($aa ne "") { 
									if ($rang_tax{$aa} == $t_max) { 
										$tax_node{$tt}{$cc}.=$aa.";";
									}
								}
							}
						}
					}
				}
			} 
	
			################ printing results 
			open (R, ">".$NGS_id_Results."/PANAM_Affiliation".$kk."_".$fff.".txt");
			if (-e $NGS_id_Results."/".$panam_output."/Similarity_annotation/Sorted_Sequences/files_".$kk."/file_cut_".$fff) {
				open (G, $NGS_id_Results."/".$panam_output."/Similarity_annotation/Sorted_Sequences/files_".$kk."/file_cut_".$fff)  ;
				my %nom_seq;
				while (<G>) {	
					if ($_=~ /^>(.*?)\s+/) {
						my $aa = $1; 
						$nom_seq{$aa} = 1;
					}
				}

				foreach my $e (keys %nom_seq) { 
				##############"ajout 24/5	
#					$seq_affil{$e}=1;
				###########fin ajout 24/5  
					if (exists $h{$e}) {
						my $min = 1000000;
						my $res_node;
						my @tab = split (/\t/, $node_seq{$e});
						foreach my $a (@tab) {
							if ($a ne "") {
								if (($child{$a} < $min) and defined ($nn_node{$a}) ) {
									$min = $child{$a}; 
									$res_node = $a; 
								}
							}
						}
	
						my @mmm = split (/\t/,$hh{$res_node}{'seq'});
						my $nearest = $mmm[1];
						my @ttax_nearest = split(/\t/,$taxeuk{$nearest});
						my $tax_nearest	= $ttax_nearest[0];
						
						my $tax_node;
						if ($tax_node{$res_node}{'tax'} eq "") { #print "res_node --- $res_node\n"; 
							$tax_node = "Other;"}	
						else { $tax_node = $tax_node{$res_node}{'tax'}	 }
				
						########## print dans 2 fichiers : le final AFFIL et le provisoir $k

						my @ppp = split (/;/, $tax_nearest);
						my $ppp = $#ppp;
						if ($ppp == 1) {
							$tax_nearest .="unclassified ".$ppp[1].";"
						}
						
						my @ooo = split (/;/, $tax_node);
						my $ooo = $#ooo;
						if ($ooo == 1) { 
							$tax_node .="unclassified ".$ooo[1].";" ; 
						}

						print R ">Read: $e\tBootstrap:$boot{$res_node}\nNearest neighbor: $nearest\t$tax_nearest\nLowest node: $res_node\t$tax_node\n\n";				
						print AFFIL ">Read: $e\tBootstrap:$boot{$res_node}\nNearest neighbor: $nearest\t$tax_nearest\nLowest node: $res_node\t$tax_node\n\n";

					}
				}
			}
			close R;

#####################################################################

			if (-e $NGS_id_Results."/PANAM_Affiliation".$kk."_".$fff.".txt") { 
				open (AA, $NGS_id_Results."/PANAM_Affiliation".$kk."_".$fff.".txt");
	
	 			my $c; my %hhh; my $bb;  my %bootstrap; 
				while (<AA>) {	
					if ($_=~ /^>Read: (.*?)_/) {
						my $aa = $1;
						$sample{$aa}=1;
					}
					if ($_=~ /^>Read: (.*?)\t(.*?)\n/) {
						$c = $1; 
						$bootstrap{$c} = $2;
					}
					else {
						chomp($_);
						$hhh{$c}.= $_
					}
				}
				close AA; 
# 			}

				my %clade;my %nbr; my %tax; my %boot;

				foreach my $f (keys %hhh) { 
					if (($hhh{$f} !~ /Metazoa/) and ($hhh{$f} !~ /Other/)) {
						$bb = $bootstrap{$f};
						my @tt = split (/Lowest node:/, $hhh{$f}) ;
						chomp ($tt[0]); chomp ($tt[1]);
			
						################"" %taxOTU pour user_file = pooled et %leveltax pour user_file = eachSample
			
						my @uu = split (/\t/, $tt[1]);
						my $node = $uu[0]; 
		
						$boot{'node'}{$node} = $bb;
						$clade{'node'}{$node}.=$f.", ";
						$nbr{'node'}{$node}++;
						$tax{'node'}{$node} =  $uu[1];
			
						if ((defined $uu[1])  and ($uu[1] ne "")) {
							$taxOTU{$f}{'LCA'} = $uu[1]; 
							chomp($uu[1]); 

							$uu[1]=~s/\t//gi;
							$uu[1]=~s/^\s+//gi;
							$uu[1]=~s/\s+$//gi;

							my @aqs = split (/;/, $uu[1]);
							my $aqs = $#aqs; 
	
							if ($aqs == 0) { }
				
							else {
								$leveltax{'LCA'}{$aqs[1]}{$aqs[2]}++
							}
						}
			
						my @u = split (/\t/, $tt[0]);
						if ((defined $u[1])  and ($u[1] ne "")) {
							$taxOTU{$f}{'NN'} = $u[1];
							chomp($u[1]); 
					
							my @bqs = split (/;/, $u[1]); 
							my $bqs = $#bqs;

							if ($bqs == 0) { }
							else {
								$leveltax{'NN'}{$bqs[1]}{$bqs[2]}++
							}
						}

		###########################################"

						my @t = split (/Nearest neighbor:/, $tt[0]);
				
						my @proc = split (/\t/, $t[1]); 
						my $voisin = $proc[0]; 
			
						$boot{'voisin'}{$voisin} = $bb;
						$tax{'voisin'}{$voisin}=$proc[1]."\n\t\t";
						$clade{'voisin'}{$voisin}.=$f.", ";
						$nbr{'voisin'}{$voisin}++;	
					}
				}

				foreach my $p (keys %{$clade{'voisin'}}) {
					chomp ($tax{'voisin'}{$p}); 
					if ($nbr{'voisin'}{$p} == 1 ) {
						print  CLADENN ">$p\t$tax{'voisin'}{$p}Phylogeny:  ".$kk."_".$fff."_rooted.newick\t".$boot{'voisin'}{$p}."\n\n\t".$nbr{'voisin'}{$p}." neighbor: ".$clade{'voisin'}{$p}."\n\n";					
					}
					else {
						print  CLADENN ">$p\t$tax{'voisin'}{$p}Phylogeny: ".$kk."_".$fff."_rooted.newick\t".$boot{'voisin'}{$p}."\n\n\t".$nbr{'voisin'}{$p}." neighbors: ".$clade{'voisin'}{$p}."\n\n";
					}
				}

				foreach my $p (keys %{$clade{'node'}}) { 
					chomp ($tax{'node'}{$p}); 
					if ($nbr{'node'}{$p} == 1 ) {
						print  CLADELCA ">$p\t$tax{'node'}{$p}\n\t\tPhylogeny:  ".$kk."_".$fff."_rooted.newick\t".$boot{'node'}{$p}."\n\n\t".$nbr{'node'}{$p}." Descendent: ".$clade{'node'}{$p}."\n\n";
					}
					else {
						print  CLADELCA ">$p\t$tax{'node'}{$p}\n\t\tPhylogeny:  ".$kk."_".$fff."_rooted.newick\t".$boot{'node'}{$p}."\n\n\t".$nbr{'node'}{$p}." Descendents : ".$clade{'node'}{$p}."\n\n";
					}
				}

			}
			`rm $NGS_id_Results/PANAM_Affiliation$kk"_"$fff.txt`;			## VERSION 3.03 ##
		}
	}
}
close AFFIL; 			## VERSION 3.03 ## 
close CLADENN;			## VERSION 3.03 ## 
close CLADELCA;			## VERSION 3.03 ## 

print "Step 6\n";

################################ eliminer les seq qui n'appartiennent pas aux groupes d'intérêt

my @dom_enlv = ("eukaryota", "bacteria" , "archaea" , "Metazoa", "Other");

foreach my $k (keys %taxOTU) { #print "k -- $k\n";
	foreach my $aff (keys %{$taxOTU{$k}}) { 
		foreach my $d (@dom_enlv) { 
			if (exists $dom{$d}) {}
			else {
				my @tab = split(/;/, $taxOTU{$k}{$aff}) ;
				#if ($taxOTU{$k}{$aff} =~ /$d/i) {  #print "$aff--- $k\t"; print "tax -- $taxOTU{$k}{$aff}\n";
				if ($tab[1] eq $d ) {
					delete ($taxOTU{$k}{$aff})
				}
			}
		}
	}
}
#################################
##### récupérer les OTUs avec seed, children et nombre ; selon le type d'analyse (Pooled ou eachSample)
##### récupérer $nbrSeq pour calcul des indices

my %nbrSeq;
my %seed; my %child;  
my %barIn;
my %NbrSeqClean;

if ($user_file eq "pooledSamples") { 
	open (FF, $NGS_id_Results."/".$preprocess_output."/pooled_sample/pooled_sample_OTU") || die "can not open file";
	#my %barIn;
	my %hps; 
	my $l;
	while (<FF>) {
		if ($_=~ /^OTU/) {}
		else {
			chomp ($_);
			my @tab = split (/\t/, $_);

			############################################ !!!!!!!!!!!
			if (exists $taxOTU{$tab[1]}{'LCA'}) {

				my $id = $tab[0]."\t".$tab[1];
				$hps{$id}{'id'} = $tab[1];
				my @tt = split (/, /, $tab[3]);			
				
				foreach my $e (@tt) {
					my $d;
					if ($e=~ /(\w.*?)_/) { 
						$d = $1; 
						########
					 	$NbrSeqClean{$d}++;
						#####
						$barIn{$d} = 1; 
						$hps{$id}{$d}++;
						$nbrSeq{$hps{$id}{'id'}}{$d}{'pooled'}++ ;
					}
				}	
		
				my $u = scalar(@tt);
				$u-= 1;
				my @t = @tt[0..$u];

				$child{$tab[1]} = \@t;
				foreach my $e (@t) {	
					if (defined $e) {
						$seed{$e} = $tab[1];
					}
				}
			} 
			else { delete $taxOTU{$tab[1]} }	### éliminer les seq qui n'ont pas été affiliées phylogénétiquement
		}
	}
	close FF;

	########################" OTU_distribution_taxo
		
	open (REPS, ">".$NGS_id_Results."/OTU_distribution_tax.txt");
 	print REPS "OTU_Id\tOTU_Seed\t";
	foreach my $pp ( (sort {$so{$a} <=> $so{$b}} keys %barIn)) { 									## VERSION 3.02 ##
	#foreach my $pp ( (sort {$barIn{$b} <=> $barIn{$a}} keys %barIn)) { 
##		print REPS "$pp ($NbrSeqClean{$pp})\t";
		print REPS "$pp\t";
	}
	print REPS "NN taxonomy\tLCA taxonomy\n"; 
		
	foreach my $p (keys %hps) {
		print REPS "$p\t";
		foreach my $pp ( (sort {$so{$a} <=> $so{$b}} keys %barIn)) { 								## VERSION 3.02 ##
		#foreach my $pp ((sort {$barIn{$b} <=> $barIn{$a}} keys %barIn)) {	
	 		if (defined $hps{$p}{$pp}) {
				print REPS"$hps{$p}{$pp}\t";
			}
			else {
				print REPS"0\t"
			}
		}
		print REPS "$taxOTU{$hps{$p}{'id'}}{'NN'}\t$taxOTU{$hps{$p}{'id'}}{'LCA'}\n" ;
	}
	close REPS;
}


if ($user_file eq "eachSample") {
	foreach my $s (keys %sample) { #print "s --- $s\n";
		open (FFF, $NGS_id_Results."/".$preprocess_output."/".$s."_OTU") || die "can not open file" ;
		while (<FFF>) {
			if ($_=~ /^OTU/) {}
			else {
				my @tab = split (/\t/, $_);
	
				############################################ !!!!!!!!!!!  /!\ NN ou LCA  dans le test d'existence ????
				if (exists $taxOTU{$tab[1]}{'LCA'}) {
					###### $nbrSeq
					$nbrSeq{$tab[1]}{$s}{'each'} = $tab[2]; #print "$s -- $tab[1] -- $tab[2]\n";
					#########			
				
					my @tt = split (/, /, $tab[3]);
					my $u = scalar(@tt);
					$u-= 1;
					my @t = @tt[0..$u];
					$child{$tab[1]} = \@t;
					foreach my $e (@t) {
						if (defined $e) {
							$seed{$e} = $tab[1]
						}
					}
				}else { delete $taxOTU{$tab[1]} }	### éliminer les seq qui n'ont pas été affiliées phylogénétiquement
			}
		}
		close FFF;
	}
}


##################################################################
	
my %seq_clean;					
foreach my $k (keys %taxOTU) { 
	foreach my $aff (keys %{$taxOTU{$k}}) { 	
		foreach my $e (@{$child{$k}}) {		
			if ((defined $e) and ($e ne "")) {
				my @oo = split (/_/,$e);			
				foreach my $s (%sample) { 										
					if ($oo[0] eq $s){ 										
						push (@{$seq_clean{$aff}{$s}}, $e) ;
					}
				}	
			}	
		}
	}
}

%child = (); 														## VERSION 3.01 ##

my %hash; my %min_norm;
my %min_norm_env;

#if (defined $aff_norm) {
	#print "aff --- $aff_norm\n";
$min_norm{'NN'} = $aff_norm;
$min_norm{'LCA'} = $aff_norm;

foreach my $aff (keys %seq_clean) {
	foreach my $ss(keys %{$seq_clean{$aff}}) {											# $ss = env 
		my $m = scalar (@{$seq_clean{$aff}{$ss}});
		if ($m >= $aff_norm) {
			$min_norm_env{$ss}=1;
		}
		foreach my $g (@{$seq_clean{$aff}{$ss}}) { 
			$hash{$ss}{$aff}{$g} = 1;
		}
	}
}
#}	

#else {
#	foreach my $aff (keys %seq_clean) {
#		$min_norm{$aff} = 1000000;
#		foreach my $ss(keys %{$seq_clean{$aff}}) {
#			my $m = scalar (@{$seq_clean{$aff}{$ss}});
#			if ( ($m < $min_norm{$aff}) and ($m > 10)) {
#				$min_norm{$aff} = $m
#			}
#			foreach my $g (@{$seq_clean{$aff}{$ss}}) { 
#				$hash{$ss}{$aff}{$g} = 1;
#			}
#		}	
#		print "mm $aff --- $min_norm{$aff}\n";
#	}	
#}

%seq_clean = ();													## VERSION 3.01 ##


##### sélectionner $min_norm séq dans chaque sample ####
my %subsample;
foreach my $k (keys %hash) { #print "k --- $k\n";										# $k = $env
	foreach my $aff (keys %{$hash{$k}}) { 
		my $sub=0;

		my $keys = keys(%{$hash{$k}{$aff}});		
		if ($keys >= $min_norm{$aff}){ #### éliminer les ech avec un nbr de seq < le seuil de normalisation
			foreach my $kk (keys %{$hash{$k}{$aff}}) { 
				$subsample{$k}{$aff}{$kk}=1;
				$sub++;
				if ($sub == $min_norm{$aff}) { last }
			}
		}
	}
}
#######

%hash = (); %min_norm = ();												## VERSION 3.01 ##		

my %d;
my %synt;
foreach my $k (keys %subsample) { 											# $k = $env
	foreach my $aff (keys %{$subsample{$k}}) {  
		foreach my $kk (keys %{$subsample{$k}{$aff}}) {
			my $aa = $seed{$kk};
			$synt{$k}{$aff}{$aa}{'reads'} .= $kk.",";
			$synt{$k}{$aff}{$aa}{'som'}++ ;
			$d{$aff}{$aa}= 1;
		}
	}
}	

%subsample = (); %seed = ();

my %distr; 
foreach my $v (keys %synt) {												# $v = $env
	foreach my $aff (keys %{$synt{$v}}) {
		foreach my $k (keys %{$d{$aff}} ) {
			if (defined $synt{$v}{$aff}{$k}{'som'}) {
				$distr{$k}{$aff}{$v} = $synt{$v}{$aff}{$k}{'som'}
			}
			else {
				$distr{$k}{$aff}{$v} = 0
			}
		}
	}
}

%d = (); 														## VERSION 3.01 ##		

################### OTU_distribution_tax normalisé
if ($user_file eq "pooledSamples") { 
	open (ODNN, ">".$NGS_id_Results."/OTU_distribution_tax_normalized_NN.txt");
	open (ODNL, ">".$NGS_id_Results."/OTU_distribution_tax_normalized_LCA.txt");
	print ODNN "OTU_Seed\t"; print ODNL "OTU_Seed\t";
 
	foreach my $pp ( (sort {$so{$a} <=> $so{$b}} keys %synt)) { 								## VERSION 3.02 ##
	#foreach my $pp ( (sort {$synt{$b} <=> $synt{$a}} keys %synt)) { 
	#foreach my $pp ( (sort {$barIn{$b} <=> $barIn{$a}} keys %barIn)) {
		print ODNN "$pp\t" ;
		print ODNL "$pp\t" ;
	}
	print ODNN "NN taxonomy\n"; 
	print ODNL "LCA taxonomy\n"; 

	#### Print data
	foreach my $k (keys %distr) {
		print ODNL "$k\t"; print ODNN "$k\t";
		#foreach my $p ((sort {$barIn{$b} <=> $barIn{$a}} keys %barIn)) {
		foreach my $p ( (sort {$so{$a} <=> $so{$b}} keys %synt)) { 								## VERSION 3.02 ##
		#foreach my $p ((sort {$synt{$b} <=> $synt{$a}} keys %synt)) {
			if (exists ($synt{$p})) {
			print ODNL "$distr{$k}{'LCA'}{$p}\t" ;
			print ODNN "$distr{$k}{'NN'}{$p}\t"
			}
			else {
			print ODNL "NA\t" ;
			print ODNN "NA\t"
			}
		}
		print ODNL "$taxOTU{$k}{'LCA'}\n";
		print ODNN "$taxOTU{$k}{'NN'}\n";
	}

	close ODNN;
	close ODNL;
}

%distr = ();  														## VERSION 3.01 ##		

	################################################ hashs pour calcul des indices et printing
	#########################################""

###################################################################################### BOUCLER sur les $env (%sample)

foreach my $env ( (sort {$so{$a} <=> $so{$b}} keys %barIn)) {								## VERSION 3.01 ## VERSION 3.02 ## 
	chomp($env);															## VERSION 3.01 ##
	
	my %res;
	my %res_norm;															#if ($normaliz eq "yes") {
	#foreach my $env (keys %synt) {													# $v = $env
		foreach my $aff (%{$synt{$env}}) {
			foreach my $Id_OTU (%{$synt{$env}{$aff}}) {

				my @bqs = split (/;/, $taxOTU{$Id_OTU}{$aff});

				if ($#bqs == 0) {
					$res_norm{$aff}{$env}{$bqs[0]}{'nbrSEQ'}+= $synt{$env}{$aff}{$Id_OTU}{'som'};
					$res_norm{$aff}{$env}{$bqs[0]}{'OTU'}{$Id_OTU} = $synt{$env}{$aff}{$Id_OTU}{'som'}	;
				
					if ($synt{$env}{$aff}{$Id_OTU}{'som'} != 0) {
						$res_norm{$aff}{$env}{$bqs[0]}{'nbrOTU'}++;
					}
					if ($synt{$env}{$aff}{$Id_OTU}{'som'} == 1) {
						$res_norm{$aff}{$env}{$bqs[0]}{'1'}++;
					}
					if ($synt{$env}{$aff}{$Id_OTU}{'som'} == 2) {
						$res_norm{$aff}{$env}{$bqs[0]}{'2'}++;
					}
				}	

				else { 
					$res_norm{$aff}{$env}{$bqs[1]}{$bqs[2]}{'nbrSEQ'}+= $synt{$env}{$aff}{$Id_OTU}{'som'};
					$res_norm{$aff}{$env}{$bqs[1]}{$bqs[2]}{'OTU'}{$Id_OTU} =$synt{$env}{$aff}{$Id_OTU}{'som'};

					$res_norm{$aff}{$env}{$bqs[1]}{'nbrSEQ'}+= $synt{$env}{$aff}{$Id_OTU}{'som'};
					$res_norm{$aff}{$env}{$bqs[1]}{'OTU'}{$Id_OTU}= $synt{$env}{$aff}{$Id_OTU}{'som'};

					###
					if ($synt{$env}{$aff}{$Id_OTU}{'som'} != 0) {						
						$res_norm{$aff}{$env}{$bqs[1]}{$bqs[2]}{'nbrOTU'}++;
						$res_norm{$aff}{$env}{$bqs[1]}{'nbrOTU'}++;
					}

					if ($synt{$env}{$aff}{$Id_OTU}{'som'} == 1) {					
						$res_norm{$aff}{$env}{$bqs[1]}{$bqs[2]}{'1'}++;
						$res_norm{$aff}{$env}{$bqs[1]}{'1'}++				
					}
					if ($synt{$env}{$aff}{$Id_OTU}{'som'} == 2) {
						$res_norm{$aff}{$env}{$bqs[1]}{$bqs[2]}{'2'}++;
						$res_norm{$aff}{$env}{$bqs[1]}{'2'}++
					}
				}
			}
		}
	#}

########################### no normalizing

#if ($normaliz eq "no" ){
	# foreach my $env (keys %barIn) {																													# $env = $env
	foreach my $l (keys %taxOTU) { 
		my $nbrSeq; 
		if ($user_file eq "eachSample") {
			$nbrSeq = $nbrSeq{$l}{$env}{'each'};
		}
		if ($user_file eq "pooledSamples") { 
			$nbrSeq = $nbrSeq{$l}{$env}{'pooled'};
		}

		#print "$l --- nbrSeq --- $nbrSeq\n";
		################## parsing method ############## 

		foreach my $aff (keys %{$taxOTU{$l}} ) { 

			my @aqs = split (/;/, $taxOTU{$l}{$aff});
	
			if ($#aqs == 0 ) {
				$res{$aff}{$env}{$aqs[0]}{'nbrSEQ'}+= $nbrSeq;
				$res{$aff}{$env}{$aqs[0]}{'OTU'}{$l} = $nbrSeq	;
				if ($nbrSeq != 0) {
					$res{$aff}{$env}{$aqs[0]}{'nbrOTU'}++;
				}
				if ($nbrSeq == 1) {
					$res{$aff}{$env}{$aqs[0]}{'1'}++;
				}
				if ($nbrSeq == 2) {
					$res{$aff}{$env}{$aqs[0]}{'2'}++;
				}
			}	
			else { 
				$res{$aff}{$env}{$aqs[1]}{$aqs[2]}{'nbrSEQ'}+= $nbrSeq;
				$res{$aff}{$env}{$aqs[1]}{$aqs[2]}{'OTU'}{$l} = $nbrSeq;
				###
				$res{$aff}{$env}{$aqs[1]}{'nbrSEQ'}+= $nbrSeq;
				$res{$aff}{$env}{$aqs[1]}{'OTU'}{$l}= $nbrSeq;
				###
				if ($nbrSeq != 0) {
					$res{$aff}{$env}{$aqs[1]}{$aqs[2]}{'nbrOTU'}++;
					$res{$aff}{$env}{$aqs[1]}{'nbrOTU'}++;
				}
				if ($nbrSeq == 1) {
					$res{$aff}{$env}{$aqs[1]}{$aqs[2]}{'1'}++;
					$res{$aff}{$env}{$aqs[1]}{'1'}++;
				}
				if ($nbrSeq == 2) {
					$res{$aff}{$env}{$aqs[1]}{$aqs[2]}{'2'}++;
					$res{$aff}{$env}{$aqs[1]}{'2'}++;
				}
			}
		}
	}
	#}
#}

	######################################################### pas de normalisation ##################
	###################################### calcul 
	foreach my $parsing (keys %res) {
		foreach my $ss (keys %{$res{$parsing}}) { 
			foreach my $a (keys %{$res{$parsing}{$ss}}) { 
		
				my $nv2_n1; my $nv2_n2;

				################## calcul de schao
				
				$nv2_n1 = $res{$parsing}{$ss}{$a}{'1'};  $nv2_n2 = $res{$parsing}{$ss}{$a}{'2'};
			
				if ((($nv2_n1 >0) and ($nv2_n2 >= 0)) or (($nv2_n1==0 ) and ($nv2_n2 == 0))) {
					$res{$parsing}{$ss}{$a}{'chao'} =  $res{$parsing}{$ss}{$a}{'nbrOTU'} + (($nv2_n1*($nv2_n1 - 1))/(2*($nv2_n2+1)));
					$res{$parsing}{$ss}{$a}{'chao'} = sprintf("%.2f", $res{$parsing}{$ss}{$a}{'chao'});
				}
				else {
					$res{$parsing}{$ss}{$a}{'chao'} = $res{$parsing}{$ss}{$a}{'nbrOTU'} + (($nv2_n1*$nv2_n1)/(2*$nv2_n2));
					$res{$parsing}{$ss}{$a}{'chao'} = sprintf("%.2f", $res{$parsing}{$ss}{$a}{'chao'});
				}
		
				############### calcul de Coverage
				my $cov;
				if ($res{$parsing}{$ss}{$a}{'nbrSEQ'} != 0 ) {	
					$cov = 1- ($nv2_n1/$res{$parsing}{$ss}{$a}{'nbrSEQ'});
					$cov = $cov * 100;
					$res{$parsing}{$ss}{$a}{'cov'} =  sprintf("%.2f", $cov);
				}
				else {
					$res{$parsing}{$ss}{$a}{'cov'} =0
				}
		
			########################################"nv3

				foreach my $aa (keys %{$leveltax{$parsing}{$a}}) { 
					my $nv3_n1; my $nv3_n2;
		
				    ##################### calcul de schao
					$nv3_n1 = $res{$parsing}{$ss}{$a}{$aa}{'1'};  
					$nv3_n2 = $res{$parsing}{$ss}{$a}{$aa}{'2'};
									
					if ((($nv3_n1 >0) and ($nv3_n2 >= 0)) or (($nv3_n1==0 ) and ($nv3_n2 == 0))) {
						$res{$parsing}{$ss}{$a}{$aa}{'chao'} =  $res{$parsing}{$ss}{$a}{$aa}{'nbrOTU'} + (($nv3_n1*($nv3_n1 - 1))/(2*($nv3_n2+1)));
						$res{$parsing}{$ss}{$a}{$aa}{'chao'} = sprintf("%.2f", $res{$parsing}{$ss}{$a}{$aa}{'chao'});
					}
					else {
						$res{$parsing}{$ss}{$a}{$aa}{'chao'} = $res{$parsing}{$ss}{$a}{$aa}{'nbrOTU'} + (($nv3_n1*$nv3_n1)/(2*$nv3_n2));
						$res{$parsing}{$ss}{$a}{$aa}{'chao'} = sprintf("%.2f", $res{$parsing}{$ss}{$a}{$aa}{'chao'});
					}
			
				    ################## calcul de Coverage
					my $cov;
					if ($res{$parsing}{$ss}{$a}{$aa}{'nbrSEQ'} != 0 ) {	
						$cov = 1- ($nv3_n1/$res{$parsing}{$ss}{$a}{$aa}{'nbrSEQ'});
						$cov = $cov * 100;
						$res{$parsing}{$ss}{$a}{$aa}{'cov'} =  sprintf("%.2f", $cov);
					}
					else {
						$res{$parsing}{$ss}{$a}{$aa}{'cov'} =0;
					}
				}
			}
		}
	}

	###################################### printing
	foreach my $parsing (sort keys %leveltax) { 												# $parsing = LCA ou NN
		
		open (TAX, ">".$NGS_id_Results."/grp_tax".$parsing."_".$env."_tmp_all");
		print TAX "\n\t";
		
		foreach my $pp (sort keys %{$leveltax{$parsing}}) {
			if (($pp ne "") and ($pp ne ";")) {	
				print TAX "\n$pp\t";
				foreach my $ppp (sort keys %{$leveltax{$parsing}{$pp}}) {
					if (($ppp ne "") and ($ppp ne ";") and ($pp ne "Metazoa") and ($pp ne "Other")) {
						print TAX "\n\t$ppp";
					}
				}
			}
		}
		
		close TAX;

		my $o = $env;
				
		# foreach my $o (keys %{$res{$parsing}}) { 											## VERSION 3.01 ##
			#if ($o ne "") {
				open (OO, ">".$NGS_id_Results."/res_".$o."_".$parsing."_tmp_all");
				print OO "\t\t\t\t\t$o\n";
				print OO "\t#seq\t#OTUs\tSchao1\tShannon\tCoverage\n";
						
				foreach my $j (sort keys %{$leveltax{$parsing}}) { 
					if ((defined $j) and ($j ne "") and ($j ne ";")) {
						my $t2; my $sh=0; 
						if (exists $res{$parsing}{$o}{$j}{'nbrOTU'}) {
							foreach my $e (keys %{$res{$parsing}{$o}{$j}{'OTU'}}) {
								if ($res{$parsing}{$o}{$j}{'OTU'}{$e} != 0) {
									$t2=$res{$parsing}{$o}{$j}{'OTU'}{$e}/$res{$parsing}{$o}{$j}{'nbrSEQ'};
									$t2 = $t2 * log($t2);
									$sh += $t2;
								}
							}
				
							$sh = $sh*(-1);
							$res{$parsing}{$o}{$j}{'shannon'} = sprintf("%.2f", $sh);
							print OO "\t$res{$parsing}{$o}{$j}{'nbrSEQ'}\t$res{$parsing}{$o}{$j}{'nbrOTU'}\t$res{$parsing}{$o}{$j}{'chao'}\t$res{$parsing}{$o}{$j}{'shannon'}\t$res{$parsing}{$o}{$j}{'cov'}\n";
						}
							
						else {
							print OO "\t0\t0\t0\t0\t0\n";
						}
					}
					foreach my $jj (sort keys %{$leveltax{$parsing}{$j}}) { 
						my $sh=0; my $t2;
						if (exists $res{$parsing}{$o}{$j}{$jj}{'nbrOTU'}) {
							foreach my $e (keys %{$res{$parsing}{$o}{$j}{$jj}{'OTU'}}) {
								if ($res{$parsing}{$o}{$j}{$jj}{'OTU'}{$e} != 0) {
									$t2=$res{$parsing}{$o}{$j}{$jj}{'OTU'}{$e}/$res{$parsing}{$o}{$j}{$jj}{'nbrSEQ'};
									$t2 = $t2 * log($t2);
									$sh += $t2;
								}
							}
										
							$sh = $sh*(-1);
							$res{$parsing}{$o}{$j}{$jj}{'shannon'} = sprintf("%.2f", $sh);
		
							print OO "\t$res{$parsing}{$o}{$j}{$jj}{'nbrSEQ'}\t$res{$parsing}{$o}{$j}{$jj}{'nbrOTU'}\t$res{$parsing}{$o}{$j}{$jj}{'chao'}\t$res{$parsing}{$o}{$j}{$jj}{'shannon'}\t$res{$parsing}{$o}{$j}{$jj}{'cov'}\n";
						}
						else {
							print OO "\t0\t0\t0\t0\t0\n";
						}
					}	
				}
				close OO;
			#}
			`paste $NGS_id_Results"/grp_tax"$parsing"_"$o"_tmp_all" $NGS_id_Results"/res_"$o"_"$parsing"_tmp_all" > $NGS_id_Results/taxonomic_distribution_"$o"_"$parsing".txt`;

		#}																	## VERSION 3.01 ##

	## 		`paste  $NGS_id_Results"/"*"_tmp_all" > $NGS_id_Results/taxonomic_distribution_"$parsing".txt`;

		`rm $NGS_id_Results/*_tmp_*`;
	}
	
	%res = (); 																	## VERSION 3.01 ##
	 
	###############################################################


	######################################################### normalisation ##################
	###################################### calcul 
	foreach my $parsing (keys %res_norm) { 
		foreach my $ss (keys %{$res_norm{$parsing}}) { 
			foreach my $a (keys %{$res_norm{$parsing}{$ss}}) { 
		
				my $nv2_n1; my $nv2_n2;

				################## calcul de schao
				
				$nv2_n1 = $res_norm{$parsing}{$ss}{$a}{'1'};  $nv2_n2 = $res_norm{$parsing}{$ss}{$a}{'2'};
			
				if ((($nv2_n1 >0) and ($nv2_n2 >= 0)) or (($nv2_n1==0 ) and ($nv2_n2 == 0))) {
					$res_norm{$parsing}{$ss}{$a}{'chao'} =  $res_norm{$parsing}{$ss}{$a}{'nbrOTU'} + (($nv2_n1*($nv2_n1 - 1))/(2*($nv2_n2+1)));
					$res_norm{$parsing}{$ss}{$a}{'chao'} = sprintf("%.2f", $res_norm{$parsing}{$ss}{$a}{'chao'});
				}
				else {
					$res_norm{$parsing}{$ss}{$a}{'chao'} = $res_norm{$parsing}{$ss}{$a}{'nbrOTU'} + (($nv2_n1*$nv2_n1)/(2*$nv2_n2));
					$res_norm{$parsing}{$ss}{$a}{'chao'} = sprintf("%.2f", $res_norm{$parsing}{$ss}{$a}{'chao'});
				}
		
				############### calcul de Coverage
				my $cov;
				if ($res_norm{$parsing}{$ss}{$a}{'nbrSEQ'} != 0 ) {	
					$cov = 1- ($nv2_n1/$res_norm{$parsing}{$ss}{$a}{'nbrSEQ'});
					$cov = $cov * 100;
					$res_norm{$parsing}{$ss}{$a}{'cov'} =  sprintf("%.2f", $cov);
				}
				else {
					$res_norm{$parsing}{$ss}{$a}{'cov'} =0
				}
		
			########################################"nv3

				foreach my $aa (keys %{$leveltax{$parsing}{$a}}) { 
					my $nv3_n1; my $nv3_n2;
		
				    ##################### calcul de schao
					$nv3_n1 = $res_norm{$parsing}{$ss}{$a}{$aa}{'1'};  
					$nv3_n2 = $res_norm{$parsing}{$ss}{$a}{$aa}{'2'};
									
					if ((($nv3_n1 >0) and ($nv3_n2 >= 0)) or (($nv3_n1==0 ) and ($nv3_n2 == 0))) {
						$res_norm{$parsing}{$ss}{$a}{$aa}{'chao'} =  $res_norm{$parsing}{$ss}{$a}{$aa}{'nbrOTU'} + (($nv3_n1*($nv3_n1 - 1))/(2*($nv3_n2+1)));
						$res_norm{$parsing}{$ss}{$a}{$aa}{'chao'} = sprintf("%.2f", $res_norm{$parsing}{$ss}{$a}{$aa}{'chao'});
					}
					else {
						$res_norm{$parsing}{$ss}{$a}{$aa}{'chao'} = $res_norm{$parsing}{$ss}{$a}{$aa}{'nbrOTU'} + (($nv3_n1*$nv3_n1)/(2*$nv3_n2));
						$res_norm{$parsing}{$ss}{$a}{$aa}{'chao'} = sprintf("%.2f", $res_norm{$parsing}{$ss}{$a}{$aa}{'chao'});
					}
			
				    ################## calcul de Coverage
					my $cov;
					if ($res_norm{$parsing}{$ss}{$a}{$aa}{'nbrSEQ'} != 0 ) {	
						$cov = 1- ($nv3_n1/$res_norm{$parsing}{$ss}{$a}{$aa}{'nbrSEQ'});
						$cov = $cov * 100;
						$res_norm{$parsing}{$ss}{$a}{$aa}{'cov'} =  sprintf("%.2f", $cov);
					}
					else {
						$res_norm{$parsing}{$ss}{$a}{$aa}{'cov'} =0
					}
				}
			}
		}
	}

	###################################### printing
	foreach my $parsing (sort keys %leveltax) {
		open (TAX, ">".$NGS_id_Results."/grp_tax".$parsing."_".$env."_norm_tmp_all");
		print TAX"\n\t";
		
		foreach my $pp (sort keys %{$leveltax{$parsing}}) {
			if (($pp ne "") and ($pp ne ";")) {	
				print TAX "\n$pp\t";
				foreach my $ppp (sort keys %{$leveltax{$parsing}{$pp}}) {
					if (($ppp ne "") and ($ppp ne ";") and ($pp ne "Metazoa") and ($pp ne "Other")) {
						print TAX "\n\t$ppp";
					}
				}
			}
		}
		
		close TAX;
		
		#foreach my $o (keys %{$res_norm{$parsing}}) {
		#foreach my $o (keys %barIn) { 
		my $o = $env;
			#if ($o ne "") {	
				open (OO, ">".$NGS_id_Results."/res_".$env."_".$parsing."_norm_tmp_all");
				print OO "\t\t\t\t\t$o\n";
				print OO "\t#seq\t#OTUs\tSchao1\tShannon\tCoverage\n";
						
				foreach my $j (sort keys %{$leveltax{$parsing}}) { 
					if ((defined $j) and ($j ne "") and ($j ne ";")) {
						my $t2; my $sh=0; 
						######
						if (exists $res_norm{$parsing}{$o}) {
						#####
							if (exists $res_norm{$parsing}{$o}{$j}{'nbrOTU'}) {
								foreach my $e (keys %{$res_norm{$parsing}{$o}{$j}{'OTU'}}) {
									if ($res_norm{$parsing}{$o}{$j}{'OTU'}{$e} != 0) {
										$t2=$res_norm{$parsing}{$o}{$j}{'OTU'}{$e}/$res_norm{$parsing}{$o}{$j}{'nbrSEQ'};
										$t2 = $t2 * log($t2);
										$sh += $t2;
									}
								}
				
								$sh = $sh*(-1);
								$res_norm{$parsing}{$o}{$j}{'shannon'} = sprintf("%.2f", $sh);
								print OO "\t$res_norm{$parsing}{$o}{$j}{'nbrSEQ'}\t$res_norm{$parsing}{$o}{$j}{'nbrOTU'}\t$res_norm{$parsing}{$o}{$j}{'chao'}\t$res_norm{$parsing}{$o}{$j}{'shannon'}\t$res_norm{$parsing}{$o}{$j}{'cov'}\n";
							}
							
							else {
								print OO "\t0\t0\t0\t0\t0\n";
							}
						}
						else {
							print OO "\tNA\tNA\tNA\tNA\tNA\n";
						}


					######
					}
					foreach my $jj (sort keys %{$leveltax{$parsing}{$j}}) { 
						my $sh=0; my $t2;
						######
						if (exists $res_norm{$parsing}{$o}) {
						#####
							if (exists $res_norm{$parsing}{$o}{$j}{$jj}{'nbrOTU'}) {
								foreach my $e (keys %{$res_norm{$parsing}{$o}{$j}{$jj}{'OTU'}}) {
									if ($res_norm{$parsing}{$o}{$j}{$jj}{'OTU'}{$e} != 0) {
										$t2=$res_norm{$parsing}{$o}{$j}{$jj}{'OTU'}{$e}/$res_norm{$parsing}{$o}{$j}{$jj}{'nbrSEQ'};
										$t2 = $t2 * log($t2);
										$sh += $t2;
									}
								}
										
								$sh = $sh*(-1);
								$res_norm{$parsing}{$o}{$j}{$jj}{'shannon'} = sprintf("%.2f", $sh);
		
								print OO "\t$res_norm{$parsing}{$o}{$j}{$jj}{'nbrSEQ'}\t$res_norm{$parsing}{$o}{$j}{$jj}{'nbrOTU'}\t$res_norm{$parsing}{$o}{$j}{$jj}{'chao'}\t$res_norm{$parsing}{$o}{$j}{$jj}{'shannon'}\t$res_norm{$parsing}{$o}{$j}{$jj}{'cov'}\n";
							}
							else {
								print OO "\t0\t0\t0\t0\t0\n";
							}
						}
						else {
							print OO "\tNA\tNA\tNA\tNA\tNA\n";
						}
					}	
				}
			#}
			close OO;
			`paste $NGS_id_Results"/grp_tax"$parsing"_"$o"_norm_tmp_all" $NGS_id_Results"/res_"$o"_"$parsing"_norm_tmp_all" > $NGS_id_Results/taxonomic_distribution_"$o"_"$parsing"_norm.txt`;

		#}

	## 		`paste  $NGS_id_Results"/"*"_tmp_all" > $NGS_id_Results/taxonomic_distribution_"$parsing".txt`;

		`rm $NGS_id_Results/*_tmp_*`;
		
	}
	 
	###############################################################

	foreach my $k (keys %NbrSeqClean) {
		print "$k --- $NbrSeqClean{$k}\n"
	}

}

my @normtypes = ("", "_norm");
my @parsingmeth = ("LCA", "NN");

my $head = `head -n 1 $NGS_id_Results/OTU_distribution_tax.txt`;
my @samples = split("\t", $head);
shift(@samples); shift(@samples);
pop(@samples); pop(@samples); 
chomp(@samples);

foreach my $parsing (@parsingmeth) {
	foreach my $ntype (@normtypes) {

		my $cpt = 1;
		
		foreach my $env (@samples) {	
			my $coden = "";
			if ($cpt<10) {
				$coden = "0";
			} 
			#print $env."\n";
			if (!( -e "tax$parsing$ntype"."_tmp0" ) )  {
				`awk -F\"\t\" '{print \$1"\t"\$2;}' $NGS_id_Results/taxonomic_distribution_$env"_"$parsing$ntype.txt > $NGS_id_Results/tax$parsing$ntype"_"tmp0`;
			}
			`awk -F\"\t\" '{print \$3"\t"\$4"\t"\$5"\t"\$6"\t"\$7"\t"\$8;}' $NGS_id_Results/taxonomic_distribution_$env"_"$parsing$ntype.txt > $NGS_id_Results/tax$parsing$ntype"_tmp"$coden$cpt`;
			`rm $NGS_id_Results/taxonomic_distribution_$env"_"$parsing$ntype.txt`; 
			$cpt++;
		}
		`paste $NGS_id_Results/tax$parsing$ntype"_"tmp* > $NGS_id_Results/taxonomic_distribution_$parsing$ntype.txt`;
		`rm $NGS_id_Results/tax$parsing$ntype"_"tmp*`;
	}
}

